Imports

In [1]:
import glob
import numpy as np
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from moviepy.editor import VideoFileClip
from IPython.display import HTML
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

Camera calibration

In [2]:
CHESSBOARD_SIZE = (9, 6)

def calibateCamera():
    """
    Calibrate camera using chessboard image.
    """
    objpoints = []
    imgpoints = []

    objp = np.zeros((CHESSBOARD_SIZE[0] * CHESSBOARD_SIZE[1], 3), np.float32)
    objp[:, :2] = np.mgrid[0:CHESSBOARD_SIZE[0], 0:CHESSBOARD_SIZE[1]].T.reshape(-1, 2)

    img_size = None

    images = glob.glob('camera_cal/calibration*.jpg')
    for image_name in images:
        img = cv2.imread(image_name)

        if img_size is None:
            img_size = img.shape[1::-1]

        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        ret, corners = cv2.findChessboardCorners(gray, CHESSBOARD_SIZE, None)

        if ret:
            imgpoints.append(corners)
            objpoints.append(objp)
    return cv2.calibrateCamera(objpoints, imgpoints, img_size, None, None)

_, mtx, dist, rvecs, tvecs = calibateCamera()
In [3]:
# let's see the result
images = glob.glob('camera_cal/calibration*.jpg')
for image_name in images[:3]:
    img = cv2.imread(image_name)
    dst = cv2.undistort(img, mtx, dist, None, mtx)
    
    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,10))
    ax1.imshow(img)
    ax1.set_title('Original Image', fontsize=30)
    ax2.imshow(dst)
    ax2.set_title('Undistorted Image', fontsize=30)

Color channels

In [4]:
test_image = cv2.imread('test_images/test5.jpg')
test_image = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB)

# gray
gray = cv2.cvtColor(test_image, cv2.COLOR_RGB2GRAY)
binary_gray = np.zeros_like(gray)
binary_gray[(gray > 180) & (gray <= 255)] = 1
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,10))
ax1.imshow(gray, cmap='gray')
ax1.set_title('Gray', fontsize=30)
ax2.imshow(binary_gray, cmap='gray')
ax2.set_title('Binary gray', fontsize=30)

# red
r_channel = test_image[:,:,0]
binary_r_channel = np.zeros_like(gray)
binary_r_channel[(r_channel > 200) & (r_channel <= 255)] = 1
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,10))
ax1.imshow(r_channel, cmap='gray')
ax1.set_title('R channel', fontsize=30)
ax2.imshow(binary_r_channel, cmap='gray')
ax2.set_title('Binary R channel', fontsize=30)

# saturation
hls = cv2.cvtColor(test_image, cv2.COLOR_RGB2HLS)
s_channel = hls[:,:,2]
binary_s_channel = np.zeros_like(gray)
binary_s_channel[(s_channel > 200) & (s_channel <= 255)] = 1
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,10))
ax1.imshow(s_channel, cmap='gray')
ax1.set_title('S channel', fontsize=30)
ax2.imshow(binary_s_channel, cmap='gray')
ax2.set_title('Binary S channel', fontsize=30)
Out[4]:
<matplotlib.text.Text at 0x7fcd00c01ef0>

As we can see, Red channel and Saturation channel hightlight the lines better than a grayscale image.

Thresholding

In [5]:
def sobel_thresh(img, orientation='x', kernel=3, thresh=(0, 255)):
    """
    Sobel threshold
    """
    p, q = (1, 0) if orientation == 'x' else (0, 1)
    sobel = cv2.Sobel(img, cv2.CV_64F, p, q, ksize=kernel)
    abs_sobel = np.absolute(sobel)
    scaled_sobel = np.uint8(255*abs_sobel/np.max(abs_sobel))
    output = np.zeros_like(scaled_sobel)
    output[(scaled_sobel >= thresh[0]) & (scaled_sobel <= thresh[1])] = 1
    return output

def mag_thresh(img, kernel=3, thresh=(0, 255)):
    """
    Magnitude of the gradient threshold
    """
    # Take both Sobel x and y gradients
    sobelx = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=kernel)
    sobely = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=kernel)
    # Calculate the gradient magnitude
    gradmag = np.sqrt(sobelx**2 + sobely**2)
    # Rescale to 8 bit
    scale_factor = np.max(gradmag)/255 
    gradmag = (gradmag/scale_factor).astype(np.uint8) 
    # Create a binary image of ones where threshold is met, zeros otherwise
    binary_output = np.zeros_like(gradmag)
    binary_output[(gradmag >= thresh[0]) & (gradmag <= thresh[1])] = 1

    # Return the binary image
    return binary_output

def dir_threshold(img, kernel=3, thresh=(0, np.pi/2)):
    """
    Direction of the gradient threshold
    """
    # Calculate the x and y gradients
    sobelx = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=kernel)
    sobely = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=kernel)
    # Take the absolute value of the gradient direction, 
    # apply a threshold, and create a binary image result
    absgraddir = np.arctan2(np.absolute(sobely), np.absolute(sobelx))
    binary_output =  np.zeros_like(absgraddir)
    binary_output[(absgraddir >= thresh[0]) & (absgraddir <= thresh[1])] = 1

    # Return the binary image
    return binary_output

def combined_gradient_thresholds(img):
    """
    Combining different gradient thresholds
    """
    gradx = sobel_thresh(img, orientation='x', kernel=31, thresh=(5, 140))
    grady = sobel_thresh(img, orientation='y', kernel=31, thresh=(5, 140))
    mag_binary = mag_thresh(img, kernel=31, thresh=(5, 140))
    dir_binary = dir_threshold(img, kernel=31, thresh=(0.5, 1.4))
    
    combined = np.zeros_like(gradx)
    combined[(((gradx == 1) & (grady == 1)) & (dir_binary == 1) | (mag_binary == 1))] = 1

    return combined

def threshold_pipeline(image):
    """
    All thresholds together
    """
    
    hls = cv2.cvtColor(image, cv2.COLOR_RGB2HLS)
    s_channel = hls[:,:,2]
    
    # gradient threshold
    gradient_thresh = combined_gradient_thresholds(s_channel)

    # red threshold
    r_channel = image[:,:,0]
    binary_r_channel = np.zeros_like(gray)
    binary_r_channel[(r_channel > 80) & (r_channel <= 255)] = 1
    
    # green threshold
    g_channel = image[:,:,0]
    binary_g_channel = np.zeros_like(gray)
    binary_g_channel[(g_channel > 80) & (g_channel <= 255)] = 1

    # saturation threshold
    binary_s_channel = np.zeros_like(gray)
    binary_s_channel[(s_channel > 80) & (s_channel <= 255)] = 1

    # combining everything together
    combined = np.zeros_like(r_channel)
    combined[(gradient_thresh == 1) & (binary_s_channel == 1) & ((binary_r_channel == 1) & (binary_g_channel == 1))] = 1
    
    return combined

# run on test images
images = glob.glob('test_images/*.jpg')
for image_name in images:
    image = cv2.imread(image_name)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    output = threshold_pipeline(image)
    
    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,10))
    ax1.imshow(image, cmap='gray')
    ax1.set_title('Original image', fontsize=30)
    ax2.imshow(output, cmap='gray')
    ax2.set_title('Result', fontsize=30)

Perspective transform

In [6]:
def wrap_perspective(image):
    """
    Makes a "bird-eye view" image
    """
    img_size = image.shape[1::-1]
    src = np.float32([
        [270,670],
        [594,450],
        [689,450],
        [1030,670]
    ])
    dst = np.float32([
        [300,730],
        [300,0],
        [900,0],
        [900,730]
    ])
    M = cv2.getPerspectiveTransform(src, dst)
    Minv = cv2.getPerspectiveTransform(dst, src)
    warped = cv2.warpPerspective(image, M, img_size, flags=cv2.INTER_LINEAR)
    return warped, Minv


# run on test images
images = glob.glob('test_images/*.jpg')
for image_name in images[:5]:
    image = cv2.imread(image_name)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    undist_image = cv2.undistort(image, mtx, dist, None, mtx)
    output, _ = wrap_perspective(undist_image)
    
    f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20,10))
    ax1.imshow(image, cmap='gray')
    ax1.set_title('Original image', fontsize=30)
    ax2.imshow(undist_image, cmap='gray')
    ax2.set_title('Undistorted image', fontsize=30)
    ax3.imshow(output, cmap='gray')
    ax3.set_title('Wrapped image', fontsize=30)

Processing Each Image

In [7]:
class LaneFinder():
    def __init__(self):
        self.image_size = None
        self.prev_left_fit = None
        self.prev_right_fit = None
        self.prev_left_fitx = None
        self.prev_right_fitx = None

        self.margin = 150    # Set the width of the windows +/- margin
        self.ym_per_pix = 30/720  # meters per pixel in y dimension
        self.xm_per_pix = 3.7/700 # meters per pixel in x dimension
        
    def set_image_size(self, image_size):
        if self.image_size is None:
            self.image_size = image_size
        else:
            assert(self.image_size == image_size)

    def detect_lines(self, binary_warped): 
        """
        The actual lane finding lives here.
        """
        # Assuming you have created a warped binary image called "binary_warped"
        # Take a histogram of the bottom half of the image
        histogram = np.sum(binary_warped[int(binary_warped.shape[0]/2):,:], axis=0)
        # Create an output image to draw on and  visualize the result
        out_img = np.dstack((binary_warped, binary_warped, binary_warped))*255
        # Find the peak of the left and right halves of the histogram
        # These will be the starting point for the left and right lines
        midpoint = np.int(histogram.shape[0]/2)
        leftx_base = np.argmax(histogram[:midpoint])
        rightx_base = np.argmax(histogram[midpoint:]) + midpoint

        nonzero = binary_warped.nonzero()
        nonzeroy = np.array(nonzero[0])
        nonzerox = np.array(nonzero[1])
        
        ploty = np.linspace(0, binary_warped.shape[0]-1, binary_warped.shape[0])

        if self.prev_left_fit is not None and self.prev_right_fit is not None:
            # if we already have previous frame, skip sliding windows
            
            left_lane_inds = ((nonzerox > (self.prev_left_fit[0]*(nonzeroy**2) + self.prev_left_fit[1]*nonzeroy + self.prev_left_fit[2] - self.margin)) & (nonzerox < (self.prev_left_fit[0]*(nonzeroy**2) + self.prev_left_fit[1]*nonzeroy + self.prev_left_fit[2] + self.margin))) 
            right_lane_inds = ((nonzerox > (self.prev_right_fit[0]*(nonzeroy**2) + self.prev_right_fit[1]*nonzeroy + self.prev_right_fit[2] - self.margin)) & (nonzerox < (self.prev_right_fit[0]*(nonzeroy**2) + self.prev_right_fit[1]*nonzeroy + self.prev_right_fit[2] + self.margin)))  
            
            # Again, extract left and right line pixel positions
            leftx = nonzerox[left_lane_inds]
            lefty = nonzeroy[left_lane_inds] 
            rightx = nonzerox[right_lane_inds]
            righty = nonzeroy[right_lane_inds]
            
            # If one line isn't found, use another line data to restore it
            if len(leftx) > 0 and len(rightx) == 0:
                lane_width = self.prev_right_fitx[-1] - self.prev_left_fitx[-1]
                rightx = leftx + lane_width
                righty = np.copy(lefty)
            elif len(leftx) == 0 and len(rightx) > 0:
                lane_width = self.prev_right_fitx[-1] - self.prev_left_fitx[-1]
                leftx = rightx - lane_width
                lefty = np.copy(righty)
            elif len(leftx) == 0 and len(rightx) == 0:
                assert(False)

            # Fit a second order polynomial to each (or use previous frame data if no pixels found)
            left_fit = np.polyfit(lefty, leftx, 2)
            right_fit = np.polyfit(righty, rightx, 2)

            # Generate x and y values for plotting
            left_fitx = left_fit[0]*ploty**2 + left_fit[1]*ploty + left_fit[2]
            right_fitx = right_fit[0]*ploty**2 + right_fit[1]*ploty + right_fit[2]
            
            # Sanity check. It it fails, use result from the previous frame
            if right_fitx[0] < left_fitx[0] or right_fitx[0] < 0 or right_fitx[0] > self.image_size[0] or left_fitx[0] < 0 or left_fitx[0] > self.image_size[0]:
                right_fit = self.prev_right_fit
                right_fitx = self.prev_right_fitx
                left_fit = self.prev_left_fit
                left_fitx = self.prev_left_fitx

            # Generate a polygon to illustrate the search window area
            # And recast the x and y points into usable format for cv2.fillPoly()
            left_line_window1 = np.array([np.transpose(np.vstack([self.prev_left_fitx-self.margin, ploty]))])
            left_line_window2 = np.array([np.flipud(np.transpose(np.vstack([self.prev_left_fitx+self.margin, ploty])))])
            left_line_pts = np.hstack((left_line_window1, left_line_window2))
            right_line_window1 = np.array([np.transpose(np.vstack([self.prev_right_fitx-self.margin, ploty]))])
            right_line_window2 = np.array([np.flipud(np.transpose(np.vstack([self.prev_right_fitx+self.margin, ploty])))])
            right_line_pts = np.hstack((right_line_window1, right_line_window2))
                
            # Draw the lane onto the warped blank image
            window_img = np.zeros_like(out_img)
            cv2.fillPoly(window_img, np.int_([left_line_pts]), (0,255, 0))
            cv2.fillPoly(window_img, np.int_([right_line_pts]), (0,255, 0))
            out_img = cv2.addWeighted(out_img, 1, window_img, 0.3, 0)

        else:
            # first frame, use sliding windows
            
            # Choose the number of sliding windows
            nwindows = 9
            # Set height of windows
            window_height = np.int(binary_warped.shape[0]/nwindows)
            # Identify the x and y positions of all nonzero pixels in the image

            # Current positions to be updated for each window
            leftx_current = leftx_base
            rightx_current = rightx_base
            
            # Set minimum number of pixels found to recenter window
            minpix = 50
            
            # Create empty lists to receive left and right lane pixel indices
            left_lane_inds = []
            right_lane_inds = []

            # Step through the windows one by one
            for window in range(nwindows):
                # Identify window boundaries in x and y (and right and left)
                win_y_low = binary_warped.shape[0] - (window+1)*window_height
                win_y_high = binary_warped.shape[0] - window*window_height
                win_xleft_low = leftx_current - self.margin
                win_xleft_high = leftx_current + self.margin
                win_xright_low = rightx_current - self.margin
                win_xright_high = rightx_current + self.margin
                
                # Draw the windows on the visualization image
                cv2.rectangle(out_img,(win_xleft_low,win_y_low),(win_xleft_high,win_y_high),(0,255,0), 2) 
                cv2.rectangle(out_img,(win_xright_low,win_y_low),(win_xright_high,win_y_high),(0,255,0), 2)
                
                # Identify the nonzero pixels in x and y within the window
                good_left_inds = ((nonzeroy >= win_y_low) & (nonzeroy < win_y_high) & (nonzerox >= win_xleft_low) & (nonzerox < win_xleft_high)).nonzero()[0]
                good_right_inds = ((nonzeroy >= win_y_low) & (nonzeroy < win_y_high) & (nonzerox >= win_xright_low) & (nonzerox < win_xright_high)).nonzero()[0]
                
                # Append these indices to the lists
                left_lane_inds.append(good_left_inds)
                right_lane_inds.append(good_right_inds)
                
                # If you found > minpix pixels, recenter next window on their mean position
                if len(good_left_inds) > minpix:
                    leftx_current = np.int(np.mean(nonzerox[good_left_inds]))
                if len(good_right_inds) > minpix:        
                    rightx_current = np.int(np.mean(nonzerox[good_right_inds]))

            # Concatenate the arrays of indices
            left_lane_inds = np.concatenate(left_lane_inds)
            right_lane_inds = np.concatenate(right_lane_inds)

            # Extract left and right line pixel positions
            leftx = nonzerox[left_lane_inds]
            lefty = nonzeroy[left_lane_inds] 
            rightx = nonzerox[right_lane_inds]
            righty = nonzeroy[right_lane_inds] 

            # Fit a second order polynomial to each
            left_fit = np.polyfit(lefty, leftx, 2)
            right_fit = np.polyfit(righty, rightx, 2)
            
            # Generate x and y values for plotting
            left_fitx = left_fit[0]*ploty**2 + left_fit[1]*ploty + left_fit[2]
            right_fitx = right_fit[0]*ploty**2 + right_fit[1]*ploty + right_fit[2]

        # Save current solution for next frame
        self.prev_left_fit = left_fit
        self.prev_right_fit = right_fit
        self.prev_left_fitx = left_fitx
        self.prev_right_fitx = right_fitx

        # hightlight the lines
        out_img[nonzeroy[left_lane_inds], nonzerox[left_lane_inds]] = [255, 0, 0]
        out_img[nonzeroy[right_lane_inds], nonzerox[right_lane_inds]] = [0, 0, 255]

        # Fit new polynomials to x,y in world space
        y_eval = np.max(ploty)

        left_fit_cr = np.polyfit(lefty * self.ym_per_pix, leftx * self.xm_per_pix, 2)
        right_fit_cr = np.polyfit(righty * self.ym_per_pix, rightx * self.xm_per_pix, 2)

        # calculate curvatures
        left_curverad = ((1 + (2*left_fit_cr[0]*y_eval*self.ym_per_pix + left_fit_cr[1])**2)**1.5) / np.absolute(2*left_fit_cr[0])
        right_curverad = ((1 + (2*right_fit_cr[0]*y_eval*self.ym_per_pix + right_fit_cr[1])**2)**1.5) / np.absolute(2*right_fit_cr[0])

        # calculate offset from the lane center
        lane_center = left_fitx[-1] + ((right_fitx[-1] - left_fitx[-1])/2.0)
        camera_center = self.image_size[0]/2.0
        offset_center = (lane_center - camera_center) * self.xm_per_pix

        return left_curverad, right_curverad, offset_center, out_img, (ploty, left_fitx, right_fitx)
In [8]:
def draw_line_detection(image, Minv, ploty, left_fitx, right_fitx):
    """
    Hightlight detected lane
    """
    # Create an image to draw the lines on
    warp_zero = np.zeros_like(image[:,:,0]).astype(np.uint8)
    color_warp = np.dstack((warp_zero, warp_zero, warp_zero))
    
    # Recast the x and y points into usable format for cv2.fillPoly()
    pts_left = np.array([np.transpose(np.vstack([left_fitx, ploty]))])
    pts_right = np.array([np.flipud(np.transpose(np.vstack([right_fitx, ploty])))])
    pts = np.hstack((pts_left, pts_right))

    # Draw the lane onto the warped blank image
    cv2.fillPoly(color_warp, np.int_([pts]), (0,255, 0))

    # Warp the blank back to original image space using inverse perspective matrix (Minv)
    newwarp = cv2.warpPerspective(color_warp, Minv, (image.shape[1], image.shape[0])) 
    # Combine the result with the original image
    result = cv2.addWeighted(image, 1, newwarp, 0.3, 0)
    
    return result
In [9]:
def make_pipeline(debug=True):
    """
    Image processing pipeline factory.
    Put debug to False to remove diagnostic info.
    """
    lane_finder = LaneFinder()
    
    def pipeline(image):
        """
        Actual pipeline.
        """
        lane_finder.set_image_size(img.shape[1::-1])
        
        undist_image = cv2.undistort(image, mtx, dist, None, mtx)
        theshold = threshold_pipeline(image)

        wrapped, Minv = wrap_perspective(theshold)

        left_curverad, right_curverad, offset_center, out_image, plot_data = lane_finder.detect_lines(wrapped)
        center_curverad = (left_curverad + right_curverad) / 2.0

        detected = draw_line_detection(undist_image, Minv, *plot_data)

        curvature_text = 'Curvature: {0:.2f} m'.format(center_curverad)
        offset_center_text = 'Offset: {0:.2f} m'.format(offset_center)

        if debug:
            result = np.zeros((1080, 1920, 3), dtype=np.uint8)
            result[0:720, 0:1280] = detected
            result[0:480, 1280:1920] = cv2.resize(np.dstack((theshold,) * 3) * 255, (640,480), interpolation=cv2.INTER_AREA)
            result[480:960, 1280:1920] = cv2.resize(out_image, (640,480), interpolation=cv2.INTER_AREA)

            text_canvas = np.zeros((360, 1280, 3), dtype=np.uint8)
            font = cv2.FONT_HERSHEY_COMPLEX
            cv2.putText(text_canvas, curvature_text, (30, 60), font, 1.5, (255,255,255), 2)
            cv2.putText(text_canvas, offset_center_text, (30, 120), font, 1.5, (255,255,255), 2)
            result[720:1080, 0:1280] = text_canvas
        else:
            result = draw_text(detected, center_curverad, offset_center)

            canvas = np.zeros_like(image).astype(np.uint8)
            font = cv2.FONT_HERSHEY_COMPLEX
            cv2.putText(canvas, curvature_text, (30, 60), font, 1.5, (255,0,0), 2)
            cv2.putText(canvas, offset_center_text, (30, 120), font, 1.5, (255,0,0), 2)
            result = cv2.addWeighted(image, 1, canvas, 0.3, 0)

        return result
    return pipeline

Generate video

In [10]:
clip1 = VideoFileClip('project_video.mp4')
white_clip = clip1.fl_image(make_pipeline(debug=True))
white_clip.write_videofile('output.mp4', audio=False, progress_bar=False)
[MoviePy] >>>> Building video output.mp4
[MoviePy] Writing video output.mp4
[MoviePy] Done.
[MoviePy] >>>> Video ready: output.mp4 

In [11]:
HTML("""
<video width="960" height="540" controls>
  <source src="{0}">
</video>
""".format('output.mp4'))
Out[11]: